import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_formats = ['svg'] # makes everything svg by default
%matplotlib inline
import seaborn as sns
sns.set_theme() # affects all matplotlib and seaborn plots
plt.style.use('ggplot')
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio
pio.templates.default = "ggplot2"
pio.renderers.default = "notebook"
# injects plotly.js into the notebook for offline plotly
# but only works for the first save, for some reason
from fredapi import Fred
fred = Fred(api_key="ee01755ad12aff2d809e022069706883")
An Excel workbook with multiple worksheets. Each worksheet holds the time series of mean forecasts for the level of a different variable. The first two columns list the year and quarter in which the survey was conducted. The remaining columns give the mean forecasts for all quarterly and annual horizons, as described below ~ SPF Documentation
the forecasters provide their projections for the next five quarters and for the current and following years
What does this mean?
spf_unrate = (
pd.read_excel(
"ds/us/survey_of_professional_forecasters_mean_responses.xlsx"
)
)
spf_unrate
| YEAR | QUARTER | UNEMP1 | UNEMP2 | UNEMP3 | UNEMP4 | UNEMP5 | UNEMP6 | UNEMPA | UNEMPB | UNEMPC | UNEMPD | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1968 | 4 | 3.5974 | 3.6218 | 3.8359 | 4.0231 | 3.9910 | 3.9397 | NaN | NaN | NaN | NaN |
| 1 | 1969 | 1 | 3.4000 | 3.5656 | 3.7738 | 3.8525 | 3.8623 | NaN | NaN | NaN | NaN | NaN |
| 2 | 1969 | 2 | 3.3036 | 3.4607 | 3.6446 | 3.8196 | 3.9111 | NaN | NaN | NaN | NaN | NaN |
| 3 | 1969 | 3 | 3.5036 | 3.6375 | 3.8214 | 3.9304 | 3.9875 | NaN | NaN | NaN | NaN | NaN |
| 4 | 1969 | 4 | 3.7055 | 3.8382 | 4.1509 | 4.4127 | 4.4036 | 4.3473 | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 211 | 2021 | 3 | 5.9025 | 5.3207 | 4.9323 | 4.6527 | 4.4176 | 4.2484 | 5.5852 | 4.3777 | 3.9636 | 3.9056 |
| 212 | 2021 | 4 | 5.1027 | 4.5741 | 4.3186 | 4.1193 | 3.9808 | 3.8864 | 5.4466 | 4.0762 | 3.7790 | 3.8033 |
| 213 | 2022 | 1 | 4.2019 | 3.9308 | 3.7532 | 3.6401 | 3.5942 | 3.5521 | 3.7336 | 3.5308 | 3.5929 | 3.7011 |
| 214 | 2022 | 2 | 3.8000 | 3.6034 | 3.5518 | 3.5648 | 3.6089 | 3.6557 | 3.6272 | 3.6860 | 3.9116 | 3.9370 |
| 215 | 2022 | 3 | 3.6000 | 3.5676 | 3.6640 | 3.7953 | 3.9310 | 4.0870 | 3.6575 | 4.0235 | 4.0518 | 4.0085 |
216 rows × 12 columns
spf_unrate["PREDICTION_PERFORMED"] = spf_unrate['YEAR'].astype(str) + 'Q' + spf_unrate['QUARTER'].astype(str)
spf_unrate
| YEAR | QUARTER | UNEMP1 | UNEMP2 | UNEMP3 | UNEMP4 | UNEMP5 | UNEMP6 | UNEMPA | UNEMPB | UNEMPC | UNEMPD | PREDICTION_PERFORMED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1968 | 4 | 3.5974 | 3.6218 | 3.8359 | 4.0231 | 3.9910 | 3.9397 | NaN | NaN | NaN | NaN | 1968Q4 |
| 1 | 1969 | 1 | 3.4000 | 3.5656 | 3.7738 | 3.8525 | 3.8623 | NaN | NaN | NaN | NaN | NaN | 1969Q1 |
| 2 | 1969 | 2 | 3.3036 | 3.4607 | 3.6446 | 3.8196 | 3.9111 | NaN | NaN | NaN | NaN | NaN | 1969Q2 |
| 3 | 1969 | 3 | 3.5036 | 3.6375 | 3.8214 | 3.9304 | 3.9875 | NaN | NaN | NaN | NaN | NaN | 1969Q3 |
| 4 | 1969 | 4 | 3.7055 | 3.8382 | 4.1509 | 4.4127 | 4.4036 | 4.3473 | NaN | NaN | NaN | NaN | 1969Q4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 211 | 2021 | 3 | 5.9025 | 5.3207 | 4.9323 | 4.6527 | 4.4176 | 4.2484 | 5.5852 | 4.3777 | 3.9636 | 3.9056 | 2021Q3 |
| 212 | 2021 | 4 | 5.1027 | 4.5741 | 4.3186 | 4.1193 | 3.9808 | 3.8864 | 5.4466 | 4.0762 | 3.7790 | 3.8033 | 2021Q4 |
| 213 | 2022 | 1 | 4.2019 | 3.9308 | 3.7532 | 3.6401 | 3.5942 | 3.5521 | 3.7336 | 3.5308 | 3.5929 | 3.7011 | 2022Q1 |
| 214 | 2022 | 2 | 3.8000 | 3.6034 | 3.5518 | 3.5648 | 3.6089 | 3.6557 | 3.6272 | 3.6860 | 3.9116 | 3.9370 | 2022Q2 |
| 215 | 2022 | 3 | 3.6000 | 3.5676 | 3.6640 | 3.7953 | 3.9310 | 4.0870 | 3.6575 | 4.0235 | 4.0518 | 4.0085 | 2022Q3 |
216 rows × 13 columns
spf_unrate = pd.melt(
spf_unrate,
id_vars = "PREDICTION_PERFORMED",
value_vars = ["UNEMP1", "UNEMP2", "UNEMP3", "UNEMP4", "UNEMP4", "UNEMP5"],
var_name = "PREDICTION_QUARTER_LAG",
value_name = "UNRATE"
)
spf_unrate
| PREDICTION_PERFORMED | PREDICTION_QUARTER_LAG | UNRATE | |
|---|---|---|---|
| 0 | 1968Q4 | UNEMP1 | 3.5974 |
| 1 | 1969Q1 | UNEMP1 | 3.4000 |
| 2 | 1969Q2 | UNEMP1 | 3.3036 |
| 3 | 1969Q3 | UNEMP1 | 3.5036 |
| 4 | 1969Q4 | UNEMP1 | 3.7055 |
| ... | ... | ... | ... |
| 1291 | 2021Q3 | UNEMP5 | 4.4176 |
| 1292 | 2021Q4 | UNEMP5 | 3.9808 |
| 1293 | 2022Q1 | UNEMP5 | 3.5942 |
| 1294 | 2022Q2 | UNEMP5 | 3.6089 |
| 1295 | 2022Q3 | UNEMP5 | 3.9310 |
1296 rows × 3 columns
spf_unrate["PREDICTION_QUARTER_LAG"] = (
spf_unrate["PREDICTION_QUARTER_LAG"]
.str[-1]
.astype(int)
)
spf_unrate
| PREDICTION_PERFORMED | PREDICTION_QUARTER_LAG | UNRATE | |
|---|---|---|---|
| 0 | 1968Q4 | 1 | 3.5974 |
| 1 | 1969Q1 | 1 | 3.4000 |
| 2 | 1969Q2 | 1 | 3.3036 |
| 3 | 1969Q3 | 1 | 3.5036 |
| 4 | 1969Q4 | 1 | 3.7055 |
| ... | ... | ... | ... |
| 1291 | 2021Q3 | 5 | 4.4176 |
| 1292 | 2021Q4 | 5 | 3.9808 |
| 1293 | 2022Q1 | 5 | 3.5942 |
| 1294 | 2022Q2 | 5 | 3.6089 |
| 1295 | 2022Q3 | 5 | 3.9310 |
1296 rows × 3 columns
def calc_period(spf_unrate):
QUARTER_PREDICTION_PERFORMED = spf_unrate["PREDICTION_PERFORMED"].str[-1].astype(int)
PREDICTION_QUARTER_LAG = spf_unrate["PREDICTION_QUARTER_LAG"]
year = spf_unrate["PREDICTION_PERFORMED"].str[0:4].astype(int)
my_sum = QUARTER_PREDICTION_PERFORMED + PREDICTION_QUARTER_LAG
period = (my_sum-1)%4 + 1
# if sum > 4
year = year + (my_sum - 1)//4
# +1 if my_sum = [4, 7]
# +2 if my_sum = [9, 13]
return year.astype(str) + "Q" + period.astype(str)
spf_unrate["PERIOD"] = calc_period(spf_unrate)
spf_unrate = spf_unrate.set_index("PERIOD")
spf_unrate = spf_unrate.drop("PREDICTION_PERFORMED", axis=1)
spf_unrate
| PREDICTION_QUARTER_LAG | UNRATE | |
|---|---|---|
| PERIOD | ||
| 1969Q1 | 1 | 3.5974 |
| 1969Q2 | 1 | 3.4000 |
| 1969Q3 | 1 | 3.3036 |
| 1969Q4 | 1 | 3.5036 |
| 1970Q1 | 1 | 3.7055 |
| ... | ... | ... |
| 2022Q4 | 5 | 4.4176 |
| 2023Q1 | 5 | 3.9808 |
| 2023Q2 | 5 | 3.5942 |
| 2023Q3 | 5 | 3.6089 |
| 2023Q4 | 5 | 3.9310 |
1296 rows × 2 columns
Starting from the earliest year of SPF (Why unnecessarily load everything?)
earliest_year = spf_unrate.index[0][0:4]
series = [
"GDP",
"UNRATE"
]
fred_data = (
pd.DataFrame(
{series:fred.get_series(
series,
observation_start = earliest_year
) for series in series}
)
)
fred_data.index.name = 'DATE'
fred_data
| GDP | UNRATE | |
|---|---|---|
| DATE | ||
| 1969-01-01 | 993.337 | 3.4 |
| 1969-02-01 | NaN | 3.4 |
| 1969-03-01 | NaN | 3.4 |
| 1969-04-01 | 1009.020 | 3.4 |
| 1969-05-01 | NaN | 3.4 |
| ... | ... | ... |
| 2022-05-01 | NaN | 3.6 |
| 2022-06-01 | NaN | 3.6 |
| 2022-07-01 | 25663.289 | 3.5 |
| 2022-08-01 | NaN | 3.7 |
| 2022-09-01 | NaN | 3.5 |
645 rows × 2 columns
pd.DataFrame(
{series:fred.get_series(
series,
observation_start = earliest_year
) for series in series}
)
| GDP | UNRATE | |
|---|---|---|
| 1969-01-01 | 993.337 | 3.4 |
| 1969-02-01 | NaN | 3.4 |
| 1969-03-01 | NaN | 3.4 |
| 1969-04-01 | 1009.020 | 3.4 |
| 1969-05-01 | NaN | 3.4 |
| ... | ... | ... |
| 2022-05-01 | NaN | 3.6 |
| 2022-06-01 | NaN | 3.6 |
| 2022-07-01 | 25663.289 | 3.5 |
| 2022-08-01 | NaN | 3.7 |
| 2022-09-01 | NaN | 3.5 |
645 rows × 2 columns
fred_data.index = fred_data.index.to_period('Q')
fred_data = fred_data.groupby("DATE").mean()
fred_data.index.name="PERIOD"
fred_data
| GDP | UNRATE | |
|---|---|---|
| PERIOD | ||
| 1969Q1 | 993.337 | 3.400000 |
| 1969Q2 | 1009.020 | 3.433333 |
| 1969Q3 | 1029.956 | 3.566667 |
| 1969Q4 | 1038.147 | 3.566667 |
| 1970Q1 | 1051.200 | 4.166667 |
| ... | ... | ... |
| 2021Q3 | 23550.420 | 5.100000 |
| 2021Q4 | 24349.121 | 4.233333 |
| 2022Q1 | 24740.480 | 3.800000 |
| 2022Q2 | 25248.476 | 3.600000 |
| 2022Q3 | 25663.289 | 3.566667 |
215 rows × 2 columns
fig = (
go.Figure()
.update_layout(
# Title and Subtitle
margin=dict(t=80, r=0, b=0, l=0),
title = dict(
text =
"Unemployment Rate over the Years" +
"<br><sup>" + "(Interactive Graph)" + "</sup>",
x = 0,
y = 0.95
),
# axes titles
xaxis_title = "Period",
yaxis_title = "Unemployment Rate",
hovermode = "x unified",
# legend
showlegend = True,
legend = dict(
groupclick="toggleitem",
orientation = 'h',
# positioning
x = 0,
xanchor = "left",
y = 1,
yanchor = "bottom",
font = dict(
size = 10
),
itemsizing = 'constant',
)
)
)
fig.add_trace(go.Scattergl(
x = fred_data.index.astype(str),
y = fred_data["UNRATE"],
name = "FRED (Actual)"
))
for i in spf_unrate["PREDICTION_QUARTER_LAG"].unique(): # 0-3
subset = spf_unrate[
spf_unrate["PREDICTION_QUARTER_LAG"] == i
]
fig.add_trace(go.Scattergl(
x = subset.index.astype(str),
y = subset["UNRATE"],
name = f"SPF Lag {i}",
# legendgroup = "SPF (Forecast)",
# legendgrouptitle_text="SPF (Forecast)"
))
fig.show(config = dict(
doubleClickDelay = 400,
displayModeBar = False,
showTips = False
))